Take-home-project

Geospatial Proj

Published

November 27, 2023

Modified

December 1, 2023

Getting Started

Lets make make sure that sfdepsftmap and tidyverse packages of R are currently installed 

pacman::p_load(sf, sfdep, tmap, tidyverse, knitr, plotly, zoo, Kendall, mapview)

Importing OD data into R

Firstly we will import the Passenger volume by Origin Destination Bus Stops data set downloaded from LTA DataMall by using read_csv() of readr package.

odbus <- read_csv("data/aspatial/origin_destination_bus_202308.csv")
glimpse(odbus)
Rows: 5,709,512
Columns: 7
$ YEAR_MONTH          <chr> "2023-08", "2023-08", "2023-08", "2023-08", "2023-…
$ DAY_TYPE            <chr> "WEEKDAY", "WEEKENDS/HOLIDAY", "WEEKENDS/HOLIDAY",…
$ TIME_PER_HOUR       <dbl> 16, 16, 14, 14, 17, 17, 17, 17, 7, 17, 14, 10, 10,…
$ PT_TYPE             <chr> "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "…
$ ORIGIN_PT_CODE      <chr> "04168", "04168", "80119", "80119", "44069", "4406…
$ DESTINATION_PT_CODE <chr> "10051", "10051", "90079", "90079", "17229", "1722…
$ TOTAL_TRIPS         <dbl> 7, 2, 3, 10, 5, 4, 3, 22, 3, 3, 7, 1, 3, 1, 3, 1, …

Origin & Destination Bus Stop Code

odbus$ORIGIN_PT_CODE <-
as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <-
as.factor(odbus$DESTINATION_PT_CODE)

Extracting study data

We will filter out the data according to the requirements set out by Professor 1.) “Weekday @ 6-9am”

origtrip_6_9 <- odbus %>%
  filter(DAY_TYPE == "WEEKDAY") %>%
  filter(TIME_PER_HOUR >= 6 &
           TIME_PER_HOUR <= 9) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(TRIPS = sum(TOTAL_TRIPS))
kable(head(origtrip_6_9))
ORIGIN_PT_CODE TRIPS
01012 1973
01013 952
01019 1789
01029 2561
01039 2938
01059 1651
write_rds(origtrip_6_9, "data/rds/origtrip_6_9.rds")

2.) “Weekday @ 5-8pm”

origtrip_17_20 <- odbus %>%
  filter(DAY_TYPE == "WEEKDAY") %>%
  filter(TIME_PER_HOUR >= 17 &
           TIME_PER_HOUR <= 20) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(TRIPS = sum(TOTAL_TRIPS))
glimpse(origtrip_17_20)
Rows: 5,035
Columns: 2
$ ORIGIN_PT_CODE <fct> 01012, 01013, 01019, 01029, 01039, 01059, 01109, 01112,…
$ TRIPS          <dbl> 8448, 7328, 3608, 9317, 12937, 2133, 322, 45010, 27233,…
kable(head(origtrip_17_20))
ORIGIN_PT_CODE TRIPS
01012 8448
01013 7328
01019 3608
01029 9317
01039 12937
01059 2133
write_rds(origtrip_17_20, "data/rds/origtrip_17_20.rds")

3.) “Weekend @ 11am-2pm”

origtrip_11_14 <- odbus %>%
  filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
  filter(TIME_PER_HOUR >= 11 &
           TIME_PER_HOUR <= 14) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(TRIPS = sum(TOTAL_TRIPS))
kable(head(origtrip_11_14))
ORIGIN_PT_CODE TRIPS
01012 2273
01013 1697
01019 1511
01029 3272
01039 5424
01059 1062
write_rds(origtrip_11_14, "data/rds/origtrip_11_14.rds")

4.) “Weekend @ 4-7pm”

origtrip_16_19 <- odbus %>%
  filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
  filter(TIME_PER_HOUR >= 16 &
           TIME_PER_HOUR <= 19) %>%
  group_by(ORIGIN_PT_CODE) %>%
  summarise(TRIPS = sum(TOTAL_TRIPS))
kable(head(origtrip_16_19))
ORIGIN_PT_CODE TRIPS
01012 3208
01013 2796
01019 1623
01029 4244
01039 7403
01059 1190
write_rds(origtrip_16_19, "data/rds/origtrip_16_19.rds")
origtrip_11_14 <- read_rds("data/rds/origtrip_11_14.rds")
origtrip_16_19 <- read_rds("data/rds/origtrip_16_19.rds")
origtrip_17_20 <- read_rds("data/rds/origtrip_17_20.rds")
origtrip_6_9 <- read_rds("data/rds/origtrip_6_9.rds")

Importing geospatial data

busstop <- st_read(dsn = "data/geospatial",
                   layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21

Glimpse the Bus Stop tibble data frame

glimpse(busstop)
Rows: 5,161
Columns: 4
$ BUS_STOP_N <chr> "22069", "32071", "44331", "96081", "11561", "66191", "2338…
$ BUS_ROOF_N <chr> "B06", "B23", "B01", "B05", "B05", "B03", "B02A", "B02", "B…
$ LOC_DESC   <chr> "OPP CEVA LOGISTICS", "AFT TRACK 13", "BLK 239", "GRACE IND…
$ geometry   <POINT [m]> POINT (13576.31 32883.65), POINT (13228.59 44206.38),…

Load Map into MPSZ

mpsz <- st_read(dsn = "data/geospatial",
                   layer = "MPSZ-2019") %>%
  st_transform(crs = 3414)
Reading layer `MPSZ-2019' from data source 
  `/Users/youting/ytquek/ISSS624/Project/data/geospatial' using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84
glimpse(mpsz)
Rows: 332
Columns: 7
$ SUBZONE_N  <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C  <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N   <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C   <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry   <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…

Geospatial Data Wrangling

Combining Busstop and mpsz

busstop_mpsz <- st_intersection(busstop, mpsz) %>%
  select(BUS_STOP_N, SUBZONE_C) %>%
  st_drop_geometry()
glimpse(busstop_mpsz)
Rows: 5,156
Columns: 2
$ BUS_STOP_N <chr> "13099", "13089", "06151", "13211", "13139", "13109", "1311…
$ SUBZONE_C  <chr> "RVSZ05", "RVSZ05", "SRSZ01", "SRSZ01", "SRSZ01", "SRSZ01",…
write_rds(busstop_mpsz, "data/rds/busstop_mpsz.csv")  
origin_6_9 <- left_join(origtrip_6_9 , busstop_mpsz,
            by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE,
         ORIGIN_SZ = SUBZONE_C) %>%
  group_by(ORIGIN_SZ) %>%
  summarise(TOT_TRIPS = sum(TRIPS))
glimpse(origin_6_9)
Rows: 312
Columns: 2
$ ORIGIN_SZ <chr> "AMSZ01", "AMSZ02", "AMSZ03", "AMSZ04", "AMSZ05", "AMSZ06", …
$ TOT_TRIPS <dbl> 116600, 226521, 199437, 114549, 70770, 102390, 23231, 28011,…
origin_17_20 <- left_join(origtrip_17_20 , busstop_mpsz,
            by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE,
         ORIGIN_SZ = SUBZONE_C) %>%
  group_by(ORIGIN_SZ) %>%
  summarise(TOT_TRIPS = sum(TRIPS))
origin_11_14 <- left_join(origtrip_11_14 , busstop_mpsz,
            by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE,
         ORIGIN_SZ = SUBZONE_C) %>%
  group_by(ORIGIN_SZ) %>%
  summarise(TOT_TRIPS = sum(TRIPS))
origin_16_19 <- left_join(origtrip_16_19 , busstop_mpsz,
            by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE,
         ORIGIN_SZ = SUBZONE_C) %>%
  group_by(ORIGIN_SZ) %>%
  summarise(TOT_TRIPS = sum(TRIPS))

Setting up the Hexagon Grid

Drawing the Hexagon Grid

Drawing the hexagon grid over the mpsz map

area_honeycomb_grid = st_make_grid(mpsz, c(250, 250), what = "polygons", square = FALSE)

To sf and add grid ID

honeycomb_grid_sf = st_sf(area_honeycomb_grid) %>%
  mutate(grid_id = 1:length(lengths(area_honeycomb_grid)))

Determine which bus stops is contained within which hexagon using st_within.

busstop_honeycomb <- st_intersection(honeycomb_grid_sf,busstop) %>%
  select(BUS_STOP_N, grid_id) %>%
  st_drop_geometry()
write_rds(busstop_honeycomb, "data/rds/busstop_honeycomb.csv")

Check for duplicate records

duplicate <- busstop_honeycomb %>%
  group_by_all() %>%
  filter(n()>1) %>%
  ungroup()
busstop_honeycomb <- unique(busstop_honeycomb)

remove grid without value of 0 (i.e. no points in side that grid)

busstop_honeycomb <- busstop_honeycomb %>% filter(!is.na(grid_id) & grid_id > 0)

Assign every Bus Stop with a Grid ID

origin_6_9 <- left_join(busstop_honeycomb, origtrip_6_9,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

origin_6_9 <- origin_6_9 %>%
  filter(!is.na(TRIPS) & TRIPS > 0)


origin_17_20 <- left_join(busstop_honeycomb, origtrip_17_20,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

origin_17_20 <- origin_17_20 %>%
  filter(!is.na(TRIPS) & TRIPS > 0)


origin_11_14 <- left_join(busstop_honeycomb, origtrip_11_14,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

origin_11_14 <- origin_11_14 %>%
  filter(!is.na(TRIPS) & TRIPS > 0)


origin_16_19 <- left_join(busstop_honeycomb, origtrip_16_19,
            by = c("BUS_STOP_N" = "ORIGIN_PT_CODE")) 

origin_16_19 <- origin_16_19 %>%
  filter(!is.na(TRIPS) & TRIPS > 0)

Choropleth Visualisation

Weekday Morning Peak 6am-9am

total_trips_by_grid <- origin_6_9 %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Reds",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

Weekday Afternoon Peak 5pm - 8pm

total_trips_by_grid <- origin_17_20 %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Reds",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

Weekday/Weekend Morning Peak 11am-2pm

total_trips_by_grid <- origin_11_14 %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Reds",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

Weekend/Holiday Peak 4pm-7pm

total_trips_by_grid <- origin_16_19 %>%
  group_by(grid_id) %>%
  summarise(total_trips = sum(TRIPS, na.rm = TRUE))

Merge geospatial data

total_trips_by_grid <- total_trips_by_grid %>%
  left_join(honeycomb_grid_sf, by = c("grid_id" = "grid_id"))

total_trips_by_grid_sf <- st_sf(total_trips_by_grid)

Plot the Choropleth map

tmap_mode("view")

map_honeycomb = tm_shape(total_trips_by_grid_sf) +
  tm_fill(
    col = "total_trips",
    palette = "Reds",
    style = "cont",
    title = "Total Trips Taken - Weekday Morning Peak 6-9am",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of trips: " = "total_trips"
    ),
    popup.format = list(
      total_trips = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.4)

map_honeycomb

Local Indicators of Spatial Association (LISA) Analysis